<!DOCTYPE html>
<html lang=en>
<head>
  <meta charset="utf-8">
  
  <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, minimum-scale=1, user-scalable=no, minimal-ui">
  <meta name="renderer" content="webkit">
  <meta http-equiv="Cache-Control" content="no-transform" />
  <meta http-equiv="Cache-Control" content="no-siteapp" />
  <meta name="apple-mobile-web-app-capable" content="yes">
  <meta name="apple-mobile-web-app-status-bar-style" content="black">
  <meta name="format-detection" content="telephone=no,email=no,adress=no">
  <!-- Color theme for statusbar -->
  <meta name="theme-color" content="#000000" />
  <!-- 强制页面在当前窗口以独立页面显示,防止别人在框架里调用页面 -->
  <meta http-equiv="window-target" content="_top" />
  
  
  <title>ObjectDetection(8)_SSD | 鲨鱼之家</title>
  <meta name="description" content="预备 什么是SSD SSD的框架  理解的关键  Default Boxes 预测框内物体类别和框位置 为什么叫做多框   基架 添加 整体结构   训练时的部分措施  难例挖掘 数据增强   实验结果  实验结果 对照实验   预测时的后处理 相关工作   预备  FPS：Frames per Second，衡量预测速度，越大越好 coarse feature maps：粗略特征图 SSD3">
<meta property="og:type" content="article">
<meta property="og:title" content="ObjectDetection(8)_SSD">
<meta property="og:url" content="http://tina-yao.gitee.io/bigbig-shark/2022/08/18/ObjectDetection-8-SSD/index.html">
<meta property="og:site_name" content="大鲨鱼">
<meta property="og:description" content="预备 什么是SSD SSD的框架  理解的关键  Default Boxes 预测框内物体类别和框位置 为什么叫做多框   基架 添加 整体结构   训练时的部分措施  难例挖掘 数据增强   实验结果  实验结果 对照实验   预测时的后处理 相关工作   预备  FPS：Frames per Second，衡量预测速度，越大越好 coarse feature maps：粗略特征图 SSD3">
<meta property="og:locale" content="en_US">
<meta property="og:image" content="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/SSDimg/1.png">
<meta property="og:image" content="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/SSDimg/3.png">
<meta property="og:image" content="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/SSDimg/4.png">
<meta property="og:image" content="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/SSDimg/5.png">
<meta property="og:image" content="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/SSDimg/6.png">
<meta property="og:image" content="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/SSDimg/2.png">
<meta property="article:published_time" content="2022-08-18T03:33:56.000Z">
<meta property="article:modified_time" content="2022-09-27T16:48:41.546Z">
<meta property="article:author" content="BigbigShark">
<meta property="article:tag" content="目标检测">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/SSDimg/1.png">
  <!-- Canonical links -->
  <link rel="canonical" href="http://tina-yao.gitee.io/bigbig-shark/2022/08/18/ObjectDetection-8-SSD/index.html">
  
    <link rel="alternate" href="/atom.xml" title="大鲨鱼" type="application/atom+xml">
  
  
    <link rel="icon" href="/favicon.png" type="image/x-icon">
  
  
<link rel="stylesheet" href="/bigbig-shark/css/style.css">

  
  
  
  
<meta name="generator" content="Hexo 5.4.0"></head>


<body class="main-center theme-purple# 主题颜色 theme-black theme-blue theme-green theme-purple" itemscope itemtype="http://schema.org/WebPage">
  <header class="header" itemscope itemtype="http://schema.org/WPHeader">
  <div class="slimContent">
    <div class="navbar-header">
      
      
      <div class="profile-block text-center">
        <a id="avatar" href="https://tina-yao.gitee.io/bigbig-shark/" target="_blank">
          <img class="img-circle img-rotate" src="/bigbig-shark/images/avatar.jpg" width="200" height="200">
        </a>
        <h2 id="name" class="hidden-xs hidden-sm">大鲨鱼</h2>
        <h3 id="title" class="hidden-xs hidden-sm hidden-md">CV&amp;Robots</h3>
        <small id="location" class="text-muted hidden-xs hidden-sm"><i class="icon icon-map-marker"></i> Wuhan, China</small>
      </div>
      
      <div class="search" id="search-form-wrap">

    <form class="search-form sidebar-form">
        <div class="input-group">
            <input type="text" class="search-form-input form-control" placeholder="Search" />
            <span class="input-group-btn">
                <button type="submit" class="search-form-submit btn btn-flat" onclick="return false;"><i class="icon icon-search"></i></button>
            </span>
        </div>
    </form>
    <div class="ins-search">
  <div class="ins-search-mask"></div>
  <div class="ins-search-container">
    <div class="ins-input-wrapper">
      <input type="text" class="ins-search-input" placeholder="Type something..." x-webkit-speech />
      <button type="button" class="close ins-close ins-selectable" data-dismiss="modal" aria-label="Close"><span aria-hidden="true">×</span></button>
    </div>
    <div class="ins-section-wrapper">
      <div class="ins-section-container"></div>
    </div>
  </div>
</div>


</div>
      <button class="navbar-toggle collapsed" type="button" data-toggle="collapse" data-target="#main-navbar" aria-controls="main-navbar" aria-expanded="false">
        <span class="sr-only">Toggle navigation</span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
      </button>
    </div>
    <nav id="main-navbar" class="collapse navbar-collapse" itemscope itemtype="http://schema.org/SiteNavigationElement" role="navigation">
      <ul class="nav navbar-nav main-nav menu-highlight">
        
        
        <li class="menu-item menu-item-home">
          <a href="/bigbig-shark/.">
            
            <i class="icon icon-home-fill"></i>
            
            <span class="menu-title">Home</span>
          </a>
        </li>
        
        
        <li class="menu-item menu-item-archives">
          <a href="/bigbig-shark/archives">
            
            <i class="icon icon-archives-fill"></i>
            
            <span class="menu-title">Archives</span>
          </a>
        </li>
        
        
        <li class="menu-item menu-item-categories">
          <a href="/bigbig-shark/categories">
            
            <i class="icon icon-folder"></i>
            
            <span class="menu-title">Categories</span>
          </a>
        </li>
        
        
        <li class="menu-item menu-item-tags">
          <a href="/bigbig-shark/tags">
            
            <i class="icon icon-tags"></i>
            
            <span class="menu-title">Tags</span>
          </a>
        </li>
        
        
        <li class="menu-item menu-item-links">
          <a href="/bigbig-shark/links">
            
            <i class="icon icon-friendship"></i>
            
            <span class="menu-title">Links</span>
          </a>
        </li>
        
        
        <li class="menu-item menu-item-about">
          <a href="/bigbig-shark/about">
            
            <i class="icon icon-cup-fill"></i>
            
            <span class="menu-title">About</span>
          </a>
        </li>
        
      </ul>
      
	
    <ul class="social-links">
    	
        <li><a href="https://gitee.com/tina-yao" target="_blank" title="Gitee" data-toggle=tooltip data-placement=top><i class="icon icon-gitee"></i></a></li>
        
    </ul>

    </nav>
  </div>
</header>

  
    <aside class="sidebar" itemscope itemtype="http://schema.org/WPSideBar">
  <div class="slimContent">
    
      <div class="widget">
    <h3 class="widget-title">Board</h3>
    <div class="widget-body">
        <div id="board">
            <div class="content">
                <p>欢迎交流与分享经验!</p>
            </div>
        </div>
    </div>
</div>

    
      
  <div class="widget">
    <h3 class="widget-title">Categories</h3>
    <div class="widget-body">
      <ul class="category-list"><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/AI/">AI</a><span class="category-list-count">1</span><ul class="category-list-child"><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/AI/ML/">ML</a><span class="category-list-count">1</span></li></ul></li><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/Concepts/">Concepts</a><span class="category-list-count">1</span></li><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/Robots/">Robots</a><span class="category-list-count">2</span></li><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/%E5%8D%8A%E6%97%A5%E9%97%B2/">半日闲</a><span class="category-list-count">5</span></li><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/%E6%95%B0%E5%AD%A6%E5%BB%BA%E6%A8%A1/">数学建模</a><span class="category-list-count">1</span></li><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90/">数据分析</a><span class="category-list-count">1</span></li><li class="category-list-item"><a class="category-list-link" href="/bigbig-shark/categories/%E8%AE%A1%E7%AE%97%E6%9C%BA%E8%A7%86%E8%A7%89/">计算机视觉</a><span class="category-list-count">30</span></li></ul>
    </div>
  </div>


    
      
  <div class="widget">
    <h3 class="widget-title">Tag Cloud</h3>
    <div class="widget-body tagcloud">
      <a href="/bigbig-shark/tags/CNN-backbones/" style="font-size: 13.67px;">CNN_backbones</a> <a href="/bigbig-shark/tags/OpenCV/" style="font-size: 13px;">OpenCV</a> <a href="/bigbig-shark/tags/Python/" style="font-size: 13px;">Python</a> <a href="/bigbig-shark/tags/ROS/" style="font-size: 13px;">ROS</a> <a href="/bigbig-shark/tags/%E4%BB%A3%E7%A0%81/" style="font-size: 13px;">代码</a> <a href="/bigbig-shark/tags/%E5%85%83%E5%AE%87%E5%AE%99/" style="font-size: 13px;">元宇宙</a> <a href="/bigbig-shark/tags/%E5%85%B4%E8%B6%A3/" style="font-size: 13px;">兴趣</a> <a href="/bigbig-shark/tags/%E5%9C%BA%E6%99%AF%E6%96%87%E5%AD%97%E8%AF%86%E5%88%AB/" style="font-size: 13px;">场景文字识别</a> <a href="/bigbig-shark/tags/%E6%91%98%E6%8A%84/" style="font-size: 13.33px;">摘抄</a> <a href="/bigbig-shark/tags/%E6%95%B0%E6%8D%AE%E9%9B%86/" style="font-size: 13px;">数据集</a> <a href="/bigbig-shark/tags/%E7%9B%AE%E6%A0%87%E6%A3%80%E6%B5%8B/" style="font-size: 14px;">目标检测</a> <a href="/bigbig-shark/tags/%E7%BE%8E%E8%B5%9B/" style="font-size: 13px;">美赛</a> <a href="/bigbig-shark/tags/%E8%81%9A%E7%B1%BB/" style="font-size: 13px;">聚类</a> <a href="/bigbig-shark/tags/%E8%87%AA%E5%8A%A8%E9%A9%BE%E9%A9%B6/" style="font-size: 13.33px;">自动驾驶</a> <a href="/bigbig-shark/tags/%E8%AE%A1%E7%AE%97%E6%9C%BA%E8%A7%86%E8%A7%893D/" style="font-size: 13px;">计算机视觉3D</a> <a href="/bigbig-shark/tags/%E8%BD%BB%E9%87%8F%E7%BA%A7/" style="font-size: 13px;">轻量级</a>
    </div>
  </div>

    
  </div>
</aside>

  
  
<main class="main" role="main">
  <div class="content">
  <article id="post-ObjectDetection-8-SSD" class="article article-type-post" itemscope itemtype="http://schema.org/BlogPosting">
    
    <div class="article-header">
      
        
  
    <h1 class="article-title" itemprop="name">
      ObjectDetection(8)_SSD
    </h1>
  

      
      <div class="article-meta">
        <span class="article-date">
    <i class="icon icon-calendar-check"></i>
	<a href="/bigbig-shark/2022/08/18/ObjectDetection-8-SSD/" class="article-date">
	  <time datetime="2022-08-18T03:33:56.000Z" itemprop="datePublished">2022-08-18</time>
	</a>
</span>
        
  <span class="article-category">
    <i class="icon icon-folder"></i>
    <a class="article-category-link" href="/bigbig-shark/categories/%E8%AE%A1%E7%AE%97%E6%9C%BA%E8%A7%86%E8%A7%89/">计算机视觉</a>
  </span>

        
  <span class="article-tag">
    <i class="icon icon-tags"></i>
	<a class="article-tag-link-link" href="/bigbig-shark/tags/%E7%9B%AE%E6%A0%87%E6%A3%80%E6%B5%8B/" rel="tag">目标检测</a>
  </span>


        

	<span class="article-read hidden-xs">
    	<i class="icon icon-eye-fill" aria-hidden="true"></i>
    	<span id="/bigbig-shark/2022/08/18/ObjectDetection-8-SSD/" class="leancloud_visitors"  data-flag-title="ObjectDetection(8)_SSD">
			<span class="leancloud-visitors-count">0</span>
		</span>
    </span>

        <span class="post-comment"><i class="icon icon-comment"></i> <a href="/bigbig-shark/2022/08/18/ObjectDetection-8-SSD/#comments" class="article-comment-link">Comments</a></span>
        
	
		<span class="post-wordcount hidden-xs" itemprop="wordCount">Word Count: 2.3k(words)</span>
	
	
		<span class="post-readcount hidden-xs" itemprop="timeRequired">Read Count: 9(minutes)</span>
	

      </div>
    </div>
    <div class="article-entry marked-body" itemprop="articleBody">
      
        <!-- toc -->
<ul>
<li><a href="#%E9%A2%84%E5%A4%87">预备</a></li>
<li><a href="#%E4%BB%80%E4%B9%88%E6%98%AFssd">什么是SSD</a></li>
<li><a href="#ssd%E7%9A%84%E6%A1%86%E6%9E%B6">SSD的框架</a>
<ul>
<li><a href="#%E7%90%86%E8%A7%A3%E7%9A%84%E5%85%B3%E9%94%AE">理解的关键</a>
<ul>
<li><a href="#default-boxes">Default Boxes</a></li>
<li><a href="#%E9%A2%84%E6%B5%8B%E6%A1%86%E5%86%85%E7%89%A9%E4%BD%93%E7%B1%BB%E5%88%AB%E5%92%8C%E6%A1%86%E4%BD%8D%E7%BD%AE">预测框内物体类别和框位置</a></li>
<li><a href="#%E4%B8%BA%E4%BB%80%E4%B9%88%E5%8F%AB%E5%81%9A%E5%A4%9A%E6%A1%86">为什么叫做多框</a></li>
</ul>
</li>
<li><a href="#%E5%9F%BA%E6%9E%B6">基架</a></li>
<li><a href="#%E6%B7%BB%E5%8A%A0">添加</a></li>
<li><a href="#%E6%95%B4%E4%BD%93%E7%BB%93%E6%9E%84">整体结构</a></li>
</ul>
</li>
<li><a href="#%E8%AE%AD%E7%BB%83%E6%97%B6%E7%9A%84%E9%83%A8%E5%88%86%E6%8E%AA%E6%96%BD">训练时的部分措施</a>
<ul>
<li><a href="#%E9%9A%BE%E4%BE%8B%E6%8C%96%E6%8E%98">难例挖掘</a></li>
<li><a href="#%E6%95%B0%E6%8D%AE%E5%A2%9E%E5%BC%BA">数据增强</a></li>
</ul>
</li>
<li><a href="#%E5%AE%9E%E9%AA%8C%E7%BB%93%E6%9E%9C">实验结果</a>
<ul>
<li><a href="#%E5%AE%9E%E9%AA%8C%E7%BB%93%E6%9E%9C-1">实验结果</a></li>
<li><a href="#%E5%AF%B9%E7%85%A7%E5%AE%9E%E9%AA%8C">对照实验</a></li>
</ul>
</li>
<li><a href="#%E9%A2%84%E6%B5%8B%E6%97%B6%E7%9A%84%E5%90%8E%E5%A4%84%E7%90%86">预测时的后处理</a></li>
<li><a href="#%E7%9B%B8%E5%85%B3%E5%B7%A5%E4%BD%9C">相关工作</a></li>
</ul>
<!-- tocstop -->
<h2><span id="预备">预备</span></h2>
<ul>
<li>FPS：Frames per Second，衡量预测速度，越大越好</li>
<li>coarse feature maps：粗略特征图</li>
<li>SSD300：指输入图片的分辨率是300 x 300</li>
<li>conv4_3：指第4个卷积块（block）的第3个卷积层（layer），例证详见<a target="_blank" rel="noopener" href="https://blog.csdn.net/m0_50652864/article/details/126413501?spm=1001.2014.3001.5501">文章</a></li>
<li>jaccard overlap：交并比</li>
</ul>
<p><img src="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/SSDimg/1.png" alt></p>
<h2><span id="什么是ssd">什么是SSD</span></h2>
<p>Single Shot MultiBox Detector，单发多框检测器，不需要region proposals，属于one-stage single network范畴。它可以实现实时检测，且能达到比Faster R-CNN更高的准确率和比YOLOV1更快的检测速度。具体精度（mAP）和速度（FPS）的比较如下图所示：</p>
<p><img src="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/SSDimg/3.png" alt></p>
<h2><span id="ssd的框架">SSD的框架</span></h2>
<h3><span id="理解的关键">理解的关键</span></h3>
<h4><span id="default-boxes">Default Boxes</span></h4>
<p>在卷积的过程中，我们会得到很多特征图（feature maps），而在某个特征图的某个位置上，以该位置为中心，可以选出有着不同宽高比（aspect ratio）和不同大小（scale）的框。这些框就叫做default boxes。</p>
<h4><span id="预测框内物体类别和框位置">预测框内物体类别和框位置</span></h4>
<p>在提取成功default boxes后，利用小卷积滤波器来每一个default box上预测类别得分和框的偏移。</p>
<h4><span id="为什么叫做多框">为什么叫做多框</span></h4>
<p>首先我们可以在卷积网络不同的出口得到不同大小的特征图，而在每个特征图的不同位置，都可以根据不同的宽高比和不同的大小来采集一系列固定大小的default boxes。然后在每个default box中，去预测框偏移和框内物体类别置信度（得分）。如下图所示：</p>
<p><img src="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/SSDimg/4.png" alt></p>
<p>训练时，首先需要将default boxes匹配上ground truth boxes。</p>
<h3><span id="基架">基架</span></h3>
<p>骨干网络是VGG16，然后在VGG16的基础上做了一些调整：</p>
<ol>
<li>将用卷积代替VGG16的fc6和fc7；</li>
<li>将pool5从2 x 2 - s2变成3 x 3 - s1（大小=3 x 3，stride=1）；</li>
<li>atrous algorithm to fill the “holes”（后面的对照实验表明，对精度而言可有可无；但对提升速度帮助很大）；</li>
<li>将VGG16中的所有dropout层和fc8都扔掉。</li>
</ol>
<h3><span id="添加">添加</span></h3>
<ol>
<li>
<p>Multi-scale feature maps，也就是将基架网络（base network）在最后截断，加上特征卷积层，得到不同大小的特征图；对于不同的特征层，用来预测的卷积滤波器是不同的（即各用各的）；所以可以预测不同大小的输入。</p>
</li>
<li>
<p>对于不同的特征层，用来预测的卷积滤波器是不同的（即各用各的）。Each added feature layer (or optionally an existing feature layer from the base network) can produce a fixed set of detection predictions using a set of convolutional filters.   而且用来预测得分和预测位置的滤波器也是不同的，若一个feature cell有k个default boxes，则一个feature cell总共有（4 + class_nums）* k个 filters，而对m x n的特征图来说，则一共有（4 + class_nums）* kmn个filters。</p>
<p>1、2两点如下图所示：</p>
<p><img src="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/SSDimg/5.png" alt></p>
</li>
</ol>
<h3><span id="整体结构">整体结构</span></h3>
<p><img src="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/SSDimg/6.png" alt></p>
<p>训练的时候需要进行ground truth boxes和default boxes的match，而预测时不需要。</p>
<h2><span id="训练时的部分措施">训练时的部分措施</span></h2>
<h3><span id="难例挖掘">难例挖掘</span></h3>
<p>Hard negative mining，直译为硬负挖掘，一般也称为难例挖掘。</p>
<p>本文使用该方法使正负样本之比为1: 3，以避免模型产生更愿意把样本预测成负样本的倾向。</p>
<p><a target="_blank" rel="noopener" href="https://blog.csdn.net/qq_36570733/article/details/83444245">难例挖掘理解</a></p>
<h3><span id="数据增强">数据增强</span></h3>
<p>为了增强模型鲁棒性，使模型能够应对各种大小和形状（宽高比）的图片，对训练集中的每一张图片都做下面三种处理中的任意一种：</p>
<ol>
<li>原图，即不做任何处理；</li>
<li>有条件采样，要求采样和图片中的物体的交并比不低于某个值（0.1，0.3，0.5，0.7或0.9）</li>
<li>随机采样。</li>
</ol>
<p>总之，采样率在[0.1, 1]区间内，宽高比在[0.5, 2]，自然采样得到的图片大小不一、形状不一，所以在将其喂入模型前，还要做一些处理将它们变换成固定大小。值得注意的是，本文认为，<strong>如果某个ground truth box的中心被采样了，那么这个ground truth box和default boxes的重叠部分应该全部加入到采样中</strong>。</p>
<blockquote>
<p>在本文中，数据增强至关重要，在后面的对照实验中发现，在其他设置保持不变，仅仅扔掉数据增强后，mAP从74.3%降到了65.5%，非常惊人的数字。</p>
<p>此外，数据增强对SSD检测小物体也有很大的帮助，但实现这种效果的是使用了其他数据增强的手段：先把图片放大，再进行上述的三种处理之一，最终mAP提升2%~3%；与此同时付出的代价是，训练需要更多迭代次数。</p>
</blockquote>
<h2><span id="实验结果">实验结果</span></h2>
<h3><span id="实验结果">实验结果</span></h3>
<p>训练验证集（用来预训练模型）是PASCAL VOC2007trainval、PASCAL VOC2012trainval，以及COCOtrainval。</p>
<p>用来对比的模型是Fast RCNN和Faster RCNN，它们的输入图片分辨率最短边都在600及以上。</p>
<p>测试集是PASCAL VOC2007test。</p>
<p>mAP越高越好。</p>
<p><img src="https://gitee.com/tina-yao/bigbig-shark/raw/master/imgs/SSDimg/2.png" alt="image-20220818211459650"></p>
<p>由上图可以看出：</p>
<ul>
<li>SSD在准确率上是好于两个RCNN模型的；</li>
<li>SSD的输入图片分辨率较大时，效果更好；</li>
<li>训练用数据集较大时，效果更好；</li>
<li>效果最好的是SSD512在07+12+COCO上的成果，mAP达到了81.6%（首先在COCO上进行预训练，然后在07和12上微调）。</li>
</ul>
<blockquote>
<p>为什么在COCO上训练，在07和12上微调？</p>
<p>很简单的道理，测试集是07啊，07和12又是同一类数据集！不管是实验，还是现实应用，肯定以目标领域来做最后的校准和评价嘛。</p>
</blockquote>
<p>其他数据集上的结果：仅仅是数据集和一些参数的改变，观测到的结果差不多，值得注意的一点是，在和YOLOV1做对比时，发现SSD在小物体的检测上确实赶不上YOLOV1，这可能和YOLOV1两步走来提炼框框的方法有关；其他的不再啰嗦。</p>
<h3><span id="对照实验">对照实验</span></h3>
<p>对照实验原理不难理解，直接上结论：</p>
<ul>
<li>Data augmentation is crucial.</li>
<li>More default box shapes is better.</li>
<li>Atrous is faster.</li>
<li>Multiple output layers at different resolution is better.</li>
</ul>
<h2><span id="预测时的后处理">预测时的后处理</span></h2>
<p>非极大值抑制，可以简单地理解为将好的预测框挑出来，其他框全部扔掉。</p>
<h2><span id="相关工作">相关工作</span></h2>
<p>本文总结得很好，直接附过来！建议做到胸有成竹！任何地方有困惑都务必再去找到原文加深理解。</p>
<p>There are two established classes of methods for object detection in images, one based on sliding windows and the other based on region proposal classification. Before the advent of convolutional neural networks, the state of the art for those two approaches – Deformable Part Model (DPM) and Selective Search – had comparable performance. However, after the dramatic improvement brought on by R-CNN, which combines selective search region proposals and convolutional network based post-classification, region proposal object detection methods became prevalent. The original R-CNN approach has been improved in a variety of ways. The first set of approaches improve the quality and speed of post-classification, since it requires the classification of thousands of image crops, which is expensive and time-consuming. SPPnet speeds up the original R-CNN approach significantly. It introduces a spatial pyramid pooling layer that is more robust to region size and scale and allows the classification layers to reuse features computed over feature maps generated at several image resolutions. Fast R-CNN extends SPPnet so that it can fine-tune all layers end-toend by minimizing a loss for both confidences and bounding box regression, which was first introduced in MultiBox for learning objectness.</p>
<p>The second set of approaches improve the quality of proposal generation using deep neural networks. In the most recent works like MultiBox, the Selective Search region proposals, which are based on low-level image features, are replaced by proposals generated directly from a separate deep neural network. This further improves the detection accuracy but results in a somewhat complex setup, requiring the training of two neural networks with a dependency between them. Faster R-CNN replaces selective search proposals by ones learned from a region proposal network (RPN), and introduces a method to integrate the RPN with Fast R-CNN by alternating between finetuning shared convolutional layers and prediction layers for these two networks. This way region proposals are used to pool mid-level features and the final classification step is less expensive. Our SSD is very similar to the region proposal network (RPN) in Faster R-CNN in that we also use a fixed set of (default) boxes for prediction, similar to the anchor boxes in the RPN. But instead of using these to pool features and evaluate another classifier, we simultaneously produce a score for each object category in each box. Thus, our approach avoids the complication of merging RPN with Fast R-CNN and is easier to train, faster, and straightforward to integrate in other tasks.</p>
<p>Another set of methods, which are directly related to our approach, skip the proposal step altogether and predict bounding boxes and confidences for multiple categories directly. OverFeat, a deep version of the sliding window method, predicts a bounding box directly from each location of the topmost feature map after knowing the confidences of the underlying object categories. YOLO uses the whole topmost feature map to predict both confidences for multiple categories and bounding boxes (which are shared for these categories). Our SSD method falls in this category because we do not have the proposal step but use the default boxes. However, our approach is more flexible than the existing methods because we can use default boxes of different aspect ratios on each feature location from multiple feature maps at different scales. If we only use one default box per location from the topmost feature map, our SSD would have similar architecture to OverFeat; if we use the whole topmost feature map and add a fully connected layer for predictions instead of our convolutional predictors, and do not explicitly consider multiple aspect ratios, we can approximately reproduce YOLO.</p>

      
    </div>
    <div class="article-footer">
      <blockquote class="mt-2x">
  <ul class="post-copyright list-unstyled">
    
    <li class="post-copyright-link hidden-xs">
      <strong>本文链接：</strong>
      <a href="http://tina-yao.gitee.io/bigbig-shark/2022/08/18/ObjectDetection-8-SSD/" title="ObjectDetection(8)_SSD" target="_blank" rel="external">http://tina-yao.gitee.io/bigbig-shark/2022/08/18/ObjectDetection-8-SSD/</a>
    </li>
    
    <li class="post-copyright-license">
      <strong>版权声明： </strong> 本博客所有文章除特别声明外，均采用 <a href="http://creativecommons.org/licenses/by/4.0/deed.zh" target="_blank" rel="external">CC BY 4.0 CN协议</a> 许可协议。转载请注明出处！
    </li>
  </ul>
</blockquote>


<div class="panel panel-default panel-badger">
  <div class="panel-body">
    <figure class="media">
      <div class="media-left">
        <a href="https://tina-yao.gitee.io/bigbig-shark/" target="_blank" class="img-burn thumb-sm visible-lg">
          <img src="/bigbig-shark/images/avatar.jpg" class="img-rounded w-full" alt="">
        </a>
      </div>
      <div class="media-body">
        <h3 class="media-heading"><a href="https://tina-yao.gitee.io/bigbig-shark/" target="_blank"><span class="text-dark">大鲨鱼</span><small class="ml-1x">CV&amp;Robots</small></a></h3>
        <div>格物致知，诚意力行。</div>
      </div>
    </figure>
  </div>
</div>


    </div>
  </article>
  
    
  <section id="comments">
  	
      <div id="vcomments"></div>
    
  </section>


  
</div>

  <nav class="bar bar-footer clearfix" data-stick-bottom>
  <div class="bar-inner">
  
  <ul class="pager pull-left">
    
    <li class="prev">
      <a href="/bigbig-shark/2022/08/20/ObjectDetection-9-YOLOV2/" title="ObjectDetection(9)_YOLOV2"><i class="icon icon-angle-left" aria-hidden="true"></i><span>&nbsp;&nbsp;Newer</span></a>
    </li>
    
    
    <li class="next">
      <a href="/bigbig-shark/2022/08/18/ObjectDetection-7-YOLOV1/" title="ObjectDetection(7)_YOLOV1"><span>Older&nbsp;&nbsp;</span><i class="icon icon-angle-right" aria-hidden="true"></i></a>
    </li>
    
    
  </ul>
  
  
  <!-- Button trigger modal -->
  <button type="button" class="btn btn-fancy btn-donate pop-onhover bg-gradient-warning" data-toggle="modal" data-target="#donateModal"><span>$</span></button>
  <!-- <div class="wave-icon wave-icon-danger btn-donate" data-toggle="modal" data-target="#donateModal">
    <div class="wave-circle"><span class="icon"><i class="icon icon-bill"></i></span></div>
  </div> -->
  
  
  <div class="bar-right">
    
    <div class="share-component" data-sites="weibo,qq,wechat" data-mobile-sites="weibo,qq"></div>
    
  </div>
  </div>
</nav>
  
<!-- Modal -->
<div class="modal modal-center modal-small modal-xs-full fade" id="donateModal" tabindex="-1" role="dialog">
  <div class="modal-dialog" role="document">
    <div class="modal-content donate">
      <button type="button" class="close" data-dismiss="modal" aria-label="Close"><span aria-hidden="true">&times;</span></button>
      <div class="modal-body">
        <div class="donate-box">
          <div class="donate-head">
            <p>Maybe you could buy me a cup of coffee.</p>
          </div>
          <div class="tab-content">
            <div role="tabpanel" class="tab-pane fade active in" id="alipay">
              <div class="donate-payimg">
                <img src="https://gitee.com/tina-yao/bigbig-shark/raw/master/images/donate/alipayimg.PNG#images/donate/alipayimg.png" alt="Scan Qrcode" title="Scan" />
              </div>
              <p class="text-muted mv">Scan this qrcode</p>
              <p class="text-grey">Open alipay app scan this qrcode, buy me a coffee!</p>
            </div>
            <div role="tabpanel" class="tab-pane fade" id="wechatpay">
              <div class="donate-payimg">
                <img src="https://gitee.com/tina-yao/bigbig-shark/raw/master/images/donate/wechatpayimg.PNG#images/donate/wechatpayimg.png" alt="Scan Qrcode" title="Scan" />
              </div>
              <p class="text-muted mv">Scan this qrcode</p>
              <p class="text-grey">Open wechat app scan this qrcode, buy me a coffee!</p>
            </div>
          </div>
          <div class="donate-footer">
            <ul class="nav nav-tabs nav-justified" role="tablist">
              <li role="presentation" class="active">
                <a href="#alipay" id="alipay-tab" role="tab" data-toggle="tab" aria-controls="alipay" aria-expanded="true"><i class="icon icon-alipay"></i> alipay</a>
              </li>
              <li role="presentation" class="">
                <a href="#wechatpay" role="tab" id="wechatpay-tab" data-toggle="tab" aria-controls="wechatpay" aria-expanded="false"><i class="icon icon-wepay"></i> wechat payment</a>
              </li>
            </ul>
          </div>
        </div>
      </div>
    </div>
  </div>
</div>



</main>

  <footer class="footer" itemscope itemtype="http://schema.org/WPFooter">
	
	
    <ul class="social-links">
    	
        <li><a href="https://gitee.com/tina-yao" target="_blank" title="Gitee" data-toggle=tooltip data-placement=top><i class="icon icon-gitee"></i></a></li>
        
    </ul>

    <div class="copyright">
    	
        &copy; 2023 BigbigShark
        
        <div class="publishby">
        <!--
        	Theme by <a href="https://github.com/cofess" target="_blank"> cofess </a>base on <a href="https://github.com/cofess/hexo-theme-pure" target="_blank">pure</a>.
        -->
        </div>
    </div>
</footer>
  <script src="//cdn.jsdelivr.net/npm/jquery@1.12.4/dist/jquery.min.js"></script>
<script>
window.jQuery || document.write('<script src="js/jquery.min.js"><\/script>')
</script>

<script src="/bigbig-shark/js/plugin.min.js"></script>


<script src="/bigbig-shark/js/application.js"></script>


    <script>
(function (window) {
    var INSIGHT_CONFIG = {
        TRANSLATION: {
            POSTS: 'Posts',
            PAGES: 'Pages',
            CATEGORIES: 'Categories',
            TAGS: 'Tags',
            UNTITLED: '(Untitled)',
        },
        ROOT_URL: '/bigbig-shark/',
        CONTENT_URL: '/bigbig-shark/content.json',
    };
    window.INSIGHT_CONFIG = INSIGHT_CONFIG;
})(window);
</script>

<script src="/bigbig-shark/js/insight.js"></script>






   




   
    
  <script src="//cdn1.lncld.net/static/js/3.0.4/av-min.js"></script>
  <script src="//cdn.jsdelivr.net/npm/valine"></script>
  <script type="text/javascript">
  var GUEST = ['nick', 'mail', 'link'];
  var meta = 'nick,mail,link';
  meta = meta.split(',').filter(function(item) {
    return GUEST.indexOf(item) > -1;
  });
  new Valine({
    el: '#vcomments',
    verify: false,
    notify: false,
    appId: '8pTCCvyyLGXskH0XW0fWHg7l-gzGzoHsz',
    appKey: 'id3rlCPRdbEoSpUfhJgcLhDO',
    placeholder: 'Just go go',
    avatar: 'mm',
    meta: meta,
    pageSize: '10' || 10,
    visitor: true
  });
  </script>

     







<script src="/bigbig-shark/live2dw/lib/L2Dwidget.min.js?094cbace49a39548bed64abff5988b05"></script><script>L2Dwidget.init({"pluginModelPath":"assets/","model":{"jsonPath":"/bigbig-shark/live2dw/assets/tororo.model.json"},"display":{"position":null,"width":300,"height":600},"log":false,"pluginJsPath":"lib/","pluginRootPath":"live2dw/","tagMode":false});</script></body>
</html>